# Who Cares if You Vote? Partisan agreement and injunctive norms of voting
# Edward Fieldhouse (1), David Cutts (2), Jack Bailey (1)
# (1) The University of Manchester, (2) The University of Birmingham


# 1. Housekeeping ---------------------------------------------------------

# The following code requires that you have first opened the associated
# project file. If not, click the button on the top right and open it
# ("Open Project...").

# Note also that the Bayesian methods we use below rely on having installed
# the probabilistic programming language Stan. If you have yet to install
# Stan, please see https://mc-stan.org/users/interfaces/.

# Load packages

library(tidyverse)
library(tidybayes)
library(magrittr)
library(jbmisc)   # devtools::install_github("jackobailey/jbmisc)
library(haven)
library(brms)
library(here)


# Load data from the British Election Study Internet Panel. Note: If you
# plan to use this data, please make sure that you download the most recent
# version available at https://www.britishelectionstudy.com.

dta <- read_sav(here("_data", "BES2015_W2_v6.9.sav.zip"))



# 2. Transform data -------------------------------------------------------

# Select variables

dta <-
  dta %>%
  select(
    # Respondent-level covariates
    wt,
    age = Age,
    female = gender,
    edu = edlevel,
    edu2 = education,
    married = marital,
    discuss = discussPolDays,
    att = polAttention,
    no_und = efficacyNotUnderstand,
    pol_care = efficacyPolCare,
    duty = dutyToVote2,
    pid = partyId,
    sq = partyIdSqueeze,
    str = partyIdStrength,
    turn = euroTurnoutRetro,
    
    # Discussant-level covariates
    rel = num_range("relationshipName", 1:3),
    app = num_range("discussantApprovalVoteName", 1:3),
    vote = num_range("discussantVoteName", 1:3),
    pol = num_range("discussPolDaysD", 1:3)
  )


# First, we'll deal with all of the respondent-level variables. To get started
# we'll start with age, which we're going to convert to z-scores to help the
# Bayesian models we fit later converge.

dta$age <-
  dta$age %>% 
  mark_na(-8:-9) %>% 
  as.numeric() %>% 
  rescale(type = "z")


# Next, we'll convert the "female" variable to a factor that takes the value 1
# where the respondent is female and 0 where they are male.

dta$female <-
  dta$female %>%
  as_factor()


# We now need to deal with our two education variables. We need "edu2" only as
# it contains information on who has some other qualification and whose are not
# known. We'll add these categories to "edu1", then populate them from "edu2".

dta$edu <-
  dta$edu %>%
  as_factor() %>%
  factor(levels = c("No qualifications",
                    "Below GCSE",
                    "GCSE",
                    "A-level",
                    "Undergraduate",
                    "Postgrad",
                    "Other",
                    "Unknown"),
         labels = c("None",
                    "Below GCSE",
                    "GCSE",
                    "A-level",
                    "Undergraduate",
                    "Postgraduate",
                    "Other",
                    "Unknown"))

dta$edu[dta$edu2 == 18] <- "Other"
dta$edu[is.na(dta$edu) == T] <- "Unknown"


# Now we'll move on to marital status. We want to convert this to a new variable
# that tells us if the respondent is cohabiting. This is because we expect similar
# "marriage" effects for those who are married, living as married, or in civil
# partnerships.

dta$cohabit <-
  dta$married %>%
  as_factor() %>%
  as_dummy(c("Married", "Living as married", "Civil Partnership")) %>%
  factor(labels = c("Other",
                    "Cohabiting"))


# There are now a host of attitudinal variables that we must deal with. These each
# have roughly the same issues. And, in most cases, we can simply mark "Don't know"
# cases as missing, convert them to numeric, and include them in our model. We'll
# do them all at once.

dta$discuss <-
  dta$discuss %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>% 
  subtract(1)


dta$att <-
  dta$att %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>% 
  subtract(1)


dta$no_und <-
  dta$no_und %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>%
  subtract(1)


dta$pol_care <-
  dta$pol_care %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>%
  subtract(1)


dta$duty <- 
  dta$duty %>% 
  as_factor() %>% 
  mark_na("Don't know") %>% 
  as.numeric() %>% 
  subtract(1)


# Now we have to convert the turnout variable to a dummy that takes the value 1
# where the respondent said that they voted and 0 otherwise.

dta$turn <-
  dta$turn %>%
  as_factor() %>% 
  as_dummy("Yes, I voted") %>%
  factor(labels = c("Didn't vote", "Voted"))


# Next we have to create a party identification variable that combines the two
# pid variables in our data. First, we use the responses from the squeeze variable
# "sq" to populate our main pid variable where respondents said that they identified
# with no party (10) or did not know (9999). We then convert the resulting variable
# to a factor so that we can include it in our model.

dta$pid[dta$pid %in% c(10, 9999)] <- dta$sq[dta$pid %in% c(10, 9999)]


# Party ID

dta$pid <-
  dta$pid %>%
  as_factor() %>%
  droplevels() %>% 
  factor(labels = c("Conservative",
                    "Labour",
                    "Liberal Democrat",
                    "SNP",
                    "Plaid Cymru",
                    "UKIP",
                    "Green Party",
                    "BNP",
                    "Other",
                    "None",
                    "Don't know"))


# Finally for our respondent-level data, we need to recode the party identification strength
# variable to include those respondents with no party identification, which we'll also set
# as the reference category. We'll also mark any cases where the respondent says that they
# "Don't know" how strongly they identified as NA.

dta$str <- 
  dta$str %>% 
  as_factor() %>% 
  mark_na("Don't know") %>% 
  fct_explicit_na("None") %>% 
  fct_relevel("None")



# Next, we must move on to the discussant-level covariates. These are tricky, so the code might
# get quite long below. First, we'll start by converting the dyadic relationship variables to
# factors using the lapply() function.

dta[, paste0("rel", 1:3)] <-
  dta[, paste0("rel", 1:3)] %>%
  lapply(as_factor)


# Now we'll convert the discussants' voting variable so that it takes the same values as the
# respondent party identification variable.

dta[, paste0("vote", 1:3)] <-
  dta[, paste0("vote", 1:3)] %>%
  lapply(as_factor) %>%
  lapply(droplevels) %>% 
  lapply(factor, labels = c("Labour",
                            "Conservative",
                            "Liberal Democrat",
                            "SNP",
                            "Plaid Cymru",
                            "Green Party",
                            "UKIP",
                            "BNP",
                            "Other",
                            "None",
                            "Don't know"))


# Next, we need to create a count of discussants for each respondent. We can do this by adding
# up how many NAs the respondent has across the 3 "rel" columns, then subtracting the answer
# from 3 -- the maximum number of discussants.

dta$n_disc <- 3 - rowSums(is.na(dta[, paste0("rel", 1:3)]) == T)


# We'll now convert the data to data frame format to make the below possible.

dta <- dta %>% data.frame()


# One complicating factor is that some respondents report fewer than 3 discussants, but do not
# necessary put those that they do report in the first slot. Sometimes, for example, respondents
# with 2 discussants will use the 2nd and 3rd slot, or the 1st and 3rd slot. We need to deal with
# this and make sure that where the respondent has fewer than 3 discussants, they always appear in
# the respective column. E.g. a respondent with 1 discussant will have responses in only column 1
# and a respondent with 2 discussants will have responses in only columns 1 and 2. As you can see
# from the output below this is not currently the case. This takes the form of a lot of recoding.

dta[, paste0("rel", 1:3)][dta$n_disc == 2 & is.na(dta$rel1) == T, ] <-
  tibble(rel1 = dta$rel2[dta$n_disc == 2 & is.na(dta$rel1) == T],
         rel2 = dta$rel3[dta$n_disc == 2 & is.na(dta$rel1) == T],
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 2 & is.na(dta$vote1) == T, ] <-
  tibble(vote1 = dta$vote2[dta$n_disc == 2 & is.na(dta$vote1) == T],
         vote2 = dta$vote3[dta$n_disc == 2 & is.na(dta$vote1) == T],
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 2 & is.na(dta$app1) == T, ] <-
  tibble(app1 = dta$app2[dta$n_disc == 2 & is.na(dta$app1) == T],
         app2 = dta$app3[dta$n_disc == 2 & is.na(dta$app1) == T],
         app3 = NA)

dta[, paste0("rel", 1:3)][dta$n_disc == 2 & is.na(dta$rel2) == T, ] <-
  tibble(rel1 = dta$rel1[dta$n_disc == 2 & is.na(dta$rel2) == T],
         rel2 = dta$rel3[dta$n_disc == 2 & is.na(dta$rel2) == T],
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 2 & is.na(dta$vote2) == T, ] <-
  tibble(vote1 = dta$vote1[dta$n_disc == 2 & is.na(dta$vote2) == T],
         vote2 = dta$vote3[dta$n_disc == 2 & is.na(dta$vote2) == T],
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 2 & is.na(dta$app2) == T, ] <-
  tibble(app1 = dta$app1[dta$n_disc == 2 & is.na(dta$app2) == T],
         app2 = dta$app3[dta$n_disc == 2 & is.na(dta$app2) == T],
         app3 = NA)

dta[, paste0("rel", 1:3)][dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel2) == T, ] <-
  tibble(rel1 = dta$rel3[dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel2) == T],
         rel2 = NA,
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote2) == T, ] <-
  tibble(vote1 = dta$vote3[dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote2) == T],
         vote2 = NA,
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app2) == T, ] <-
  tibble(app1 = dta$app3[dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app2) == T],
         app2 = NA,
         app3 = NA)

dta[, paste0("rel", 1:3)][dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel3) == T, ] <-
  tibble(rel1 = dta$rel2[dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel3) == T],
         rel2 = NA,
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote3) == T, ] <-
  tibble(vote1 = dta$vote2[dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote3) == T],
         vote2 = NA,
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app3) == T, ] <-
  tibble(app1 = dta$app2[dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app3) == T],
         app2 = NA,
         app3 = NA)


# Because of the way that the multimembership models work, we cannot have any empty columns.
# This is difficult where respondents have only 1 or 2 discussants. Fortunately, this type
# of model does include weights for the multimembership structure. As such, where we have
# missing data, we can repeat discussants across multiple columns but weight them to sum to
# only a single individual. For example, where a respondent has 3 discussants, each column
# has a value and each discussant receives a weight of 1. Where a respondent has only 2
# discussants, we repeat the second instance across the third column then assign each column
# weights of 1, .5, and .5, which sum to 2 -- the number of discussants.

dta[, c("rel3", "vote3", "app3")][dta$n_disc == 2, ] <- 
  dta[, c("rel2", "vote2", "app2")][dta$n_disc == 2, ]

dta[, c("rel2", "vote2", "app2")][dta$n_disc == 1, ] <-
  dta[, c("rel1", "vote1", "app1")][dta$n_disc == 1, ]

dta[, c("rel3", "vote3", "app3")][dta$n_disc == 1, ] <-
  dta[, c("rel1", "vote1", "app1")][dta$n_disc == 1, ]


# Now we'll create the weighting variables. As well as count *how many* discussants each
# respondent has, these also account for how often they discussed politics with them. We
# use the "pol" variables to do this, then create the weights where respondents have 3,
# 2, and 1 discussants.

dta[, paste0("w", 1:3)] <- NA

dta[, paste0("pol", 1:3)] <- 
  dta[, paste0("pol", 1:3)] %>% 
  lapply(as_factor) %>% 
  lapply(mark_na, "Don't know") %>% 
  lapply(as.numeric) %>% 
  lapply(subtract, 1) %>% 
  lapply(divide_by, 7)

dta[, paste0("w", 1:3)][dta$n_disc == 3, ] <- 
  dta[, paste0("pol", 1:3)][dta$n_disc == 3, ]

dta[, paste0("w", 1:3)][dta$n_disc == 2, ] <- 
  tibble(w1 = dta$pol1[dta$n_disc == 2],
         w2 = dta$pol2[dta$n_disc == 2]/2,
         w3 = dta$pol2[dta$n_disc == 2]/2)

dta[, paste0("w", 1:3)][dta$n_disc == 1, ] <-
  tibble(w1 = dta$pol1[dta$n_disc == 1]/3,
         w2 = dta$pol1[dta$n_disc == 1]/3,
         w3 = dta$pol1[dta$n_disc == 1]/3)


# Finally, we need to recode our main independent variable -- discussant approval.
# This is a little bit tricky too. First, we'll recode these variables into dummy
# variables that take the value 1 where the respondent said that the discussant
# would care if they voted and 0 otherwise. We will include the original "app"
# variables at the 2nd level, but need to create a new variable to capture the
# main effect at the individual level while accounting for the number of discussants
# that each respondent has. We do this by calculating the weighted average of the
# "app" variables.

dta[, paste0("app", 1:3)] <-
  dta[, paste0("app", 1:3)] %>%
  lapply(as_factor) %>% 
  lapply(as_dummy, "Yes")

dta <-
  dta %>% 
  mutate(
    app_fix1 = app1*w1,
    app_fix2 = app2*w2,
    app_fix3 = app3*w3
  )

dta$app_fix <- rowMeans(dta[, paste0("app_fix", 1:3)])


# Another tricky bit, we need to create new variables that tell us about the nature of
# each respondents-discussant dyad's partisanship. We can do this using the case_when()
# function.

opt <- c("None", "Don't know")

dta <-
  dta %>%
  mutate(
    prt1 = 
      case_when(
        pid == vote1 & !(pid %in% opt) & !(vote1 %in% opt) & pid != "Other" & vote1 != "Other" & is.na(rel1) == F ~ "Shared Partisanship",
        pid != vote1 & !(pid %in% opt) & !(vote1 %in% opt) & is.na(rel1) == F ~ "Opposing Partisanship",
        pid == "Other" & vote1 == "Other" & is.na(rel1) == F ~ "Opposing Partisanship",
        pid %in% opt & !(vote1 %in% opt) & is.na(rel1) == F ~ "Respondent Non-partisan",
        vote1 %in% opt & !(pid %in% opt) & is.na(rel1) == F ~ "Discussant Non-partisan",
        pid %in% opt & vote1 %in% opt & is.na(rel1) == F ~ "Shared Non-partisan"),
    prt2 = 
      case_when(
        pid == vote2 & !(pid %in% opt) & !(vote2 %in% opt) & pid != "Other" & vote2 != "Other" & is.na(rel2) == F ~ "Shared Partisanship",
        pid != vote2 & !(pid %in% opt) & !(vote2 %in% opt) & is.na(rel2) == F ~ "Opposing Partisanship",
        pid == "Other" & vote2 == "Other" & is.na(rel2) == F ~ "Opposing Partisanship",
        pid %in% opt & !(vote2 %in% opt) & is.na(rel2) == F ~ "Respondent Non-partisan",
        vote2 %in% opt & !(pid %in% opt) & is.na(rel2) == F ~ "Discussant Non-partisan",
        pid %in% opt & vote2 %in% opt & is.na(rel2) == F ~ "Shared Non-partisan"),
    prt3 =
      case_when(
        pid == vote3 & !(pid %in% opt) & !(vote3 %in% opt) & pid != "Other" & vote3 != "Other" & is.na(rel3) == F ~ "Shared Partisanship",
        pid != vote3 & !(pid %in% opt) & !(vote3 %in% opt) & is.na(rel3) == F ~ "Opposing Partisanship",
        pid == "Other" & vote3 == "Other" & is.na(rel3) == F ~ "Opposing Partisanship",
        pid %in% opt & !(vote3 %in% opt) & is.na(rel3) == F ~ "Respondent Non-partisan",
        vote3 %in% opt & !(pid %in% opt) & is.na(rel3) == F ~ "Discussant Non-partisan",
        pid %in% opt & vote3 %in% opt & is.na(rel3) == F ~ "Shared Non-partisan")
  )


# Finally, we'll select only the variables we need for our analysis, then remove any missing
# data using listwise deletion.

dta <-
  dta %>%
  select(wt,
         age,
         female,
         edu,
         cohabit,
         discuss,
         att,
         no_und,
         pol_care,
         duty,
         str,
         turn,
         app_fix,
         num_range("rel", 1:3),
         num_range("app", 1:3),
         num_range("prt", 1:3),
         num_range("w", 1:3)) %>% 
  na.omit()



# 3. Fit models -----------------------------------------------------------

# First, we'll fit the turnout model without civic duty

m1 <- 
  brm(
    formula =
      turn | weights(wt) ~ 1 + age + female + edu + cohabit + discuss + att + no_und + pol_care + str + app_fix +
      (1 + mmc(app1, app2, app3) | mm(prt1, prt2, prt3, weights = cbind(w1, w2, w3), scale = F)),
    family =
      bernoulli(link = "logit"),
    prior = 
      prior(normal(0, 1.5), class = "Intercept") +
      prior(normal(0, .1), class = "b") +
      prior(exponential(2), class = "sd") +
      prior(lkj(2), class = "cor"),
    data = dta,
    iter = 2000,
    chains = 4,
    cores = 4,
    control = list(adapt_delta = .99),
    file = here("_output", "w2_m1")
  )

summary(m1)
ranef(m1)


# Then, we'll fit the vote model *with* civic duty.

m2 <- 
  brm(
    formula =
      turn | weights(wt) ~ 1 + age + female + edu + cohabit + discuss + att + no_und + pol_care + duty + str + app_fix +
      (1 + mmc(app1, app2, app3) | mm(prt1, prt2, prt3, weights = cbind(w1, w2, w3), scale = F)),
    family =
      bernoulli(link = "logit"),
    prior = 
      prior(normal(0, 1.5), class = "Intercept") +
      prior(normal(0, .1), class = "b") +
      prior(exponential(2), class = "sd") +
      prior(lkj(2), class = "cor"),
    data = dta,
    iter = 2000,
    chains = 4,
    cores = 4,
    control = list(adapt_delta = .99),
    file = here("_output", "w2_m2")
  )

summary(m2)
ranef(m2)


# Third, we'll fit the civic duty model.

m3 <- 
  brm(
    formula =
      duty | weights(wt) ~ 1 + age + female + edu + cohabit + discuss + att + no_und + pol_care + str + app_fix +
      (1 + mmc(app1, app2, app3) | mm(prt1, prt2, prt3, weights = cbind(w1, w2, w3), scale = F)),
    family =
      gaussian(),
    prior = 
      prior(normal(2, 1), class = "Intercept") +
      prior(normal(0, 1), class = "b") +
      prior(exponential(5), class = "sd") +
      prior(lkj(2), class = "cor"),
    data = dta,
    iter = 2000,
    chains = 4,
    cores = 4,
    control = list(adapt_delta = .99),
    file = here("_output", "w2_m3")
  )

summary(m3)
ranef(m3)



# 4. Create predicted probability plots -----------------------------------

# We're going to create three different plots, one predicted probability plot
# for each model. These are a little complicated, as they involve creating new
# counterfactual datasets that hold all variables other than those we really
# care about at their mean or their reference categories.

# First, we'll compute the predicted probabilities for the first model.

probs <- data.frame(prt = NA,
                    prob_app1 = NA,
                    prob_app0 = NA)

for(i in unique(m1$data$prt1)){
  
  # Compute predicted probabilities where app == 1
  p_app1 <- fitted(m1,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 1,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 1,
                                        app2 = 1,
                                        app3 = 1,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Compute predicted probabilities where app == 0
  p_app0 <- fitted(m1,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 0,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 0,
                                        app2 = 0,
                                        app3 = 0,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Add to data frame
  p <- data.frame(prt = i,
                  prob_app1 = as.numeric(p_app1),
                  prob_app0 = as.numeric(p_app0))
  
  # Bind to previous predictions
  probs <- rbind(probs,
                 p)
  
}


# Next, we'll remove any rows that include NAs.

probs <-
  probs %>%
  na.omit()


# Next, we'll reorder the dyadic partisanship factor so that appears on the plot in
# the right order.

probs$prt <- 
  probs$prt %>% 
  factor(levels = c("Shared Non-partisan",
                    "Discussant Non-partisan",
                    "Respondent Non-partisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"),
         labels = c("Shared Nonpartisan",
                    "Discussant Nonpartisan",
                    "Respondent Nonpartisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"))


# Then, we'll compute contrasts for each row
probs$cont <- probs$prob_app1 - probs$prob_app0


# Next, we'll convert everything to long format so that we can plot the predicted
# probabilities and the contrasts on seperate facets.

probs <- 
  probs %>% 
  pivot_longer(
    cols = c(prob_app1, prob_app0, cont)
  ) %>% 
  mutate(
    panel =
      case_when(
        name %in% c("prob_app1", "prob_app0") ~ "Predicted Probability",
        TRUE ~ "Predicted Contrast"
      ) %>% 
      factor(
        levels = c("Predicted Probability", "Predicted Contrast")
      ),
    name =
      name %>% 
      factor(
        levels = c("prob_app0", "prob_app1", "cont"),
        labels = c("Do not approve", "Approve", "Contrast")
      )
  )


# We'll also summarise these data to provide some useful labels

prob_labs <- 
  probs %>% 
  group_by(prt, name, panel) %>% 
  summarise(value = median(value))


# Now that we have the necessary data, we can plot these results using ggplot()

plot1 <- 
  probs %>% 
  ggplot(
    aes(
      y = prt,
      x = value,
      slab_fill = name,
      slab_colour = name
    )
  ) +
  facet_wrap(~panel) +
  stat_halfeye(.width = .95,
                point_size = 1,
                interval_size = 0.5,
                slab_size = 0.5,
                height = .7,
                normalize = "groups") +
  stat_summary(geom = "point",
               fun = "median",
               colour = "white",
               size = 0.3) +
  geom_text(data = prob_labs,
            aes(label = scales::percent(value,
                                        accuracy = 0.1,
                                        suffix = ""),
                x = value + .002,
                y = prt),
            family = "Cabin",
            size = 2,
            vjust = 2,
            hjust = 0.7,
            inherit.aes = FALSE) +
  scale_y_discrete(labels = str_wrap(levels(probs$prt), width = 8)) +
  scale_x_continuous(breaks = seq(0, 1, by = .2),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0, 1)) +
  labs(x = "Prob. of Voting",
       colour = "",
       fill = "") +
  scale_colour_manual(values = c("#D85A00", "#2368B2", "grey40"),
                      aesthetics = "slab_colour") +
  scale_fill_manual(values = c("#E28745", "#5F91C7", "grey60"),
                    aesthetics = "slab_fill") +
  theme_bailey() +
  theme(legend.position = "bottom",
        legend.margin = margin(t = 0, b = 0, unit = "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1)),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(hjust= .5))


# Now we can render the plot as a jpeg
jpeg(file = here("_output", "w2_plot1.jpeg"),
     res = 300,
     width = 5.97,
     height = 3.69,
     units = "in")
plot1
dev.off()


# Finally, we'll compute the difference in difference
# between shared and opposing partisan dyads.

(probs$value[probs$prt == "Shared Partisanship" & probs$name == "Contrast"] -
    probs$value[probs$prt == "Opposing Partisanship" & probs$name == "Contrast"]) %>% 
  quantile(probs = c(.5, .025, .975)) %>% 
  round(2)




# Next, we'll compute the predicted probabilities for the second model

probs <- data.frame(prt = NA,
                    prob_app1 = NA,
                    prob_app0 = NA)

for(i in unique(m1$data$prt1)){
  
  # Compute predicted probabilities where app == 1
  p_app1 <- fitted(m2,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        duty = mean(dta$duty),
                                        str = "None",
                                        app_fix = 1,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 1,
                                        app2 = 1,
                                        app3 = 1,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Compute predicted probabilities where app == 0
  p_app0 <- fitted(m2,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        duty = mean(dta$duty),
                                        str = "None",
                                        app_fix = 0,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 0,
                                        app2 = 0,
                                        app3 = 0,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Add to data frame
  p <- data.frame(prt = i,
                  prob_app1 = as.numeric(p_app1),
                  prob_app0 = as.numeric(p_app0))
  
  # Bind to previous predictions
  probs <- rbind(probs,
                 p)
  
}


# Next, we'll remove any rows that include NAs.

probs <-
  probs %>%
  na.omit()


# Next, we'll reorder the dyadic partisanship factor so that appears on the plot in
# the right order.

probs$prt <- 
  probs$prt %>% 
  factor(levels = c("Shared Non-partisan",
                    "Discussant Non-partisan",
                    "Respondent Non-partisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"),
         labels = c("Shared Nonpartisan",
                    "Discussant Nonpartisan",
                    "Respondent Nonpartisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"))


# Then, we'll compute contrasts for each row
probs$cont <- probs$prob_app1 - probs$prob_app0


# Next, we'll convert everything to long format so that we can plot the predicted
# probabilities and the contrasts on seperate facets.

probs <- 
  probs %>% 
  pivot_longer(
    cols = c(prob_app1, prob_app0, cont)
  ) %>% 
  mutate(
    panel =
      case_when(
        name %in% c("prob_app1", "prob_app0") ~ "Predicted Probability",
        TRUE ~ "Predicted Contrast"
      ) %>% 
      factor(
        levels = c("Predicted Probability", "Predicted Contrast")
      ),
    name =
      name %>% 
      factor(
        levels = c("prob_app0", "prob_app1", "cont"),
        labels = c("Do not approve", "Approve", "Contrast")
      )
  )


# We'll also summarise these data to provide some useful labels

prob_labs <- 
  probs %>% 
  group_by(prt, name, panel) %>% 
  summarise(value = median(value))


# Now that we have the necessary data, we can plot these results using ggplot()

plot2 <- 
  probs %>% 
  ggplot(
    aes(
      y = prt,
      x = value,
      slab_fill = name,
      slab_colour = name
    )
  ) +
  facet_wrap(~panel) +
  stat_halfeye(.width = .95,
                point_size = 1,
                interval_size = 0.5,
                slab_size = 0.5,
                height = .7,
                normalize = "groups") +
  stat_summary(geom = "point",
               fun = "median",
               colour = "white",
               size = 0.3) +
  geom_text(data = prob_labs,
            aes(label = scales::percent(value,
                                        accuracy = 0.1,
                                        suffix = ""),
                x = value + .002,
                y = prt),
            family = "Cabin",
            size = 2,
            vjust = 2,
            hjust = 0.7,
            inherit.aes = FALSE) +
  scale_y_discrete(labels = str_wrap(levels(probs$prt), width = 8)) +
  scale_x_continuous(breaks = seq(0, 1, by = .2),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0, 1)) +
  labs(x = "Prob. of Voting",
       colour = "",
       fill = "") +
  scale_colour_manual(values = c("#D85A00", "#2368B2", "grey40"),
                      aesthetics = "slab_colour") +
  scale_fill_manual(values = c("#E28745", "#5F91C7", "grey60"),
                    aesthetics = "slab_fill") +
  theme_bailey() +
  theme(legend.position = "bottom",
        legend.margin = margin(t = 0, b = 0, unit = "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1)),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(hjust= .5))


# Now we can render the plot as a jpeg
jpeg(file = here("_output", "w2_plot2.jpeg"),
     res = 300,
     width = 5.97,
     height = 3.69,
     units = "in")
plot2
dev.off()


# Again, we'll compute the difference in difference too

(probs$value[probs$prt == "Shared Partisanship" & probs$name == "Contrast"] -
    probs$value[probs$prt == "Opposing Partisanship" & probs$name == "Contrast"]) %>% 
  quantile(probs = c(.5, .025, .975)) %>% 
  round(2)



# Finally, we'll do the same for the third model

probs <- data.frame(prt = NA,
                    prob_app1 = NA,
                    prob_app0 = NA)

for(i in unique(m3$data$prt1)){
  
  # Compute predicted probabilities where app == 1
  p_app1 <- fitted(m3,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 1,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 1,
                                        app2 = 1,
                                        app3 = 1,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Compute predicted probabilities where app == 0
  p_app0 <- fitted(m3,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 0,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 0,
                                        app2 = 0,
                                        app3 = 0,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Add to data frame
  p <- data.frame(prt = i,
                  prob_app1 = as.numeric(p_app1),
                  prob_app0 = as.numeric(p_app0))
  
  # Bind to previous predictions
  probs <- rbind(probs,
                 p)
  
}


# Next, we'll remove any rows that include NAs.

probs <-
  probs %>%
  na.omit()


# Next, we'll reorder the dyadic partisanship factor so that appears on the plot in
# the right order.

probs$prt <- 
  probs$prt %>% 
  factor(levels = c("Shared Non-partisan",
                    "Discussant Non-partisan",
                    "Respondent Non-partisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"),
         labels = c("Shared Nonpartisan",
                    "Discussant Nonpartisan",
                    "Respondent Nonpartisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"))


# Then, we'll compute contrasts for each row
probs$cont <- probs$prob_app1 - probs$prob_app0


# Next, we'll convert everything to long format so that we can plot the predicted
# probabilities and the contrasts on seperate facets.

probs <- 
  probs %>% 
  pivot_longer(
    cols = c(prob_app1, prob_app0, cont)
  ) %>% 
  mutate(
    panel =
      case_when(
        name %in% c("prob_app1", "prob_app0") ~ "Predicted Outcome",
        TRUE ~ "Predicted Contrast"
      ) %>% 
      factor(
        levels = c("Predicted Outcome", "Predicted Contrast")
      ),
    name =
      name %>% 
      factor(
        levels = c("prob_app0", "prob_app1", "cont"),
        labels = c("Do not approve", "Approve", "Contrast")
      )
  )


# We'll also summarise these data to provide some useful labels

prob_labs <- 
  probs %>% 
  group_by(prt, name, panel) %>% 
  summarise(value = median(value))


# Now that we have the necessary data, we can plot these results using ggplot()

plot3 <- 
  probs %>% 
  ggplot(
    aes(
      y = prt,
      x = value,
      slab_fill = name,
      slab_colour = name
    )
  ) +
  facet_wrap(~panel) +
  stat_halfeye(.width = .95,
                point_size = 1,
                interval_size = 0.5,
                slab_size = 0.5,
                height = .7,
                normalize = "groups") +
  stat_summary(geom = "point",
               fun = "median",
               colour = "white",
               size = 0.3) +
  geom_text(data = prob_labs,
            aes(label = round(value, 1),
                x = value + .002,
                y = prt),
            family = "Cabin",
            size = 2,
            vjust = 2,
            hjust = 0.7,
            inherit.aes = FALSE) +
  scale_y_discrete(labels = str_wrap(levels(probs$prt), width = 8)) +
  scale_x_continuous(breaks = 0:4) +
  coord_cartesian(xlim = c(0, 4)) +
  labs(x = 'It is every citizen’s duty to vote in an election (0 = "Strongly Disagree", 4 = "Strongly Agree")',
       colour = "",
       fill = "") +
  scale_colour_manual(values = c("#D85A00", "#2368B2", "grey40"),
                      aesthetics = "slab_colour") +
  scale_fill_manual(values = c("#E28745", "#5F91C7", "grey60"),
                    aesthetics = "slab_fill") +
  theme_bailey() +
  theme(legend.position = "bottom",
        legend.margin = margin(t = 0, b = 0, unit = "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1)),
        axis.title.y = element_blank(),
        axis.text.y = element_text(hjust= .5))


# Now we can render the plot as a jpeg
jpeg(file = here("_output", "w2_plot3.jpeg"),
     res = 300,
     width = 5.97,
     height = 3.69,
     units = "in")
plot3
dev.off()



# 5. Create tables --------------------------------------------------------

# First, compute the Bayesian R2 for each model

r2_1 <- bayes_R2(m1)
r2_2 <- bayes_R2(m2)
r2_3 <- bayes_R2(m3)


# Table 1

tab1 <- 
  posterior_samples(m1,
                    pars =
                      c("b_", "r_m")) %>% 
  mutate(draw = 1:n()) %>% 
  pivot_longer(
    cols = names(.)[1:30],
    names_to = "effect"
  ) %>% 
  group_by(effect) %>% 
  summarise(
    est = median(value),
    err = sd(value),
    lower = quantile(value, .025),
    upper = quantile(value, .975),
  ) %>% 
  filter(effect != "cor_mmprt1prt2prt3__Intercept__mmcapp1app2app3") %>% 
  mutate(
    effect =
      effect %>% 
      factor(
        levels =
          c("b_Intercept",
            "b_age",
            "b_femaleFemale",
            "b_eduBelowGCSE",
            "b_eduGCSE",
            "b_eduAMlevel",
            "b_eduUndergraduate",
            "b_eduPostgraduate",
            "b_eduOther",
            "b_eduUnknown",
            "b_cohabitCohabiting",
            "b_discuss",
            "b_att",
            "b_no_und",
            "b_pol_care",
            "b_strVerystrong",
            "b_strFairlystrong",
            "b_strNotverystrong",
            "b_app_fix",
            "r_mmprt1prt2prt3[Discussant.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Opposing.Partisanship,Intercept]",
            "r_mmprt1prt2prt3[Respondent.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Shared.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Shared.Partisanship,Intercept]",
            "r_mmprt1prt2prt3[Discussant.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Opposing.Partisanship,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Respondent.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Shared.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Shared.Partisanship,mmcapp1app2app3]"),
        labels =
          c("Intercept",
            "Age (z-score)",
            "Female",
            "Below GCSE",
            "GCSE",
            "A Level",
            "Undergraduate",
            "Postgraduate",
            "Other",
            "Unknown",
            "Cohabiting",
            "Discuss Politics",
            "Political Attention",
            "Don't Understand",
            "Don't Care",
            "Very Strong PID",
            "Fairly Strong PID",
            "Not Very Strong PID",
            "Discussant Approval",
            "Discussant Non-partisan, Intercept",
            "Opposing Partisanship, Intercept",
            "Respondent Non-partisan, Intercept",
            "Shared Non-partisan, Intercept",
            "Shared Partisanship, Intercept",
            "Discussant Non-partisan, Approval",
            "Opposing Partisanship, Approval",
            "Respondent Non-partisan, Approval",
            "Shared Non-partisan, Approval",
            "Shared Partisanship, Approval")
      ),
    est = sprintf("%.2f", round(est, 2)),
    err = sprintf("%.2f", round(err, 2)),
    lower = sprintf("%.2f", round(lower, 2)),
    upper = sprintf("%.2f", round(upper, 2)),
  ) %>% 
  arrange(effect) %>% 
  add_row(effect = "N, Respondents",
          est = "22,958",
          err = "56,377",
          lower = "",
          upper = "") %>% 
  add_row(effect = "N, Discussants",
          est = "22,958",
          err = "56,377",
          lower = "",
          upper = "") %>% 
  add_row(effect = "Bayesian R2",
          est = format(round(r2_1[1], 2), nsmall = 2),
          err = format(round(r2_1[2], 2), nsmall = 2),
          lower = format(round(r2_1[3], 2), nsmall = 2),
          upper = format(round(r2_1[4], 2), nsmall = 2))

# Save

write.csv(tab1, file = here("_output", "model1.csv"))


# Table 2

tab2 <- 
  posterior_samples(m2,
                    pars =
                      c("b_", "r_m")) %>% 
  mutate(draw = 1:n()) %>% 
  pivot_longer(
    cols = names(.)[1:31],
    names_to = "effect"
  ) %>% 
  group_by(effect) %>% 
  summarise(
    est = median(value),
    err = sd(value),
    lower = quantile(value, .025),
    upper = quantile(value, .975),
  ) %>% 
  filter(effect != "cor_mmprt1prt2prt3__Intercept__mmcapp1app2app3") %>% 
  mutate(
    effect =
      effect %>% 
      factor(
        levels =
          c("b_Intercept",
            "b_age",
            "b_femaleFemale",
            "b_eduBelowGCSE",
            "b_eduGCSE",
            "b_eduAMlevel",
            "b_eduUndergraduate",
            "b_eduPostgraduate",
            "b_eduOther",
            "b_eduUnknown",
            "b_cohabitCohabiting",
            "b_discuss",
            "b_att",
            "b_no_und",
            "b_pol_care",
            "b_duty",
            "b_strVerystrong",
            "b_strFairlystrong",
            "b_strNotverystrong",
            "b_app_fix",
            "r_mmprt1prt2prt3[Discussant.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Opposing.Partisanship,Intercept]",
            "r_mmprt1prt2prt3[Respondent.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Shared.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Shared.Partisanship,Intercept]",
            "r_mmprt1prt2prt3[Discussant.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Opposing.Partisanship,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Respondent.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Shared.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Shared.Partisanship,mmcapp1app2app3]"),
        labels =
          c("Intercept",
            "Age (z-score)",
            "Female",
            "Below GCSE",
            "GCSE",
            "A Level",
            "Undergraduate",
            "Postgraduate",
            "Other",
            "Unknown",
            "Cohabiting",
            "Discuss Politics",
            "Political Attention",
            "Don't Understand",
            "Don't Care",
            "Duty",
            "Very Strong PID",
            "Fairly Strong PID",
            "Not Very Strong PID",
            "Discussant Approval",
            "Discussant Non-partisan, Intercept",
            "Opposing Partisanship, Intercept",
            "Respondent Non-partisan, Intercept",
            "Shared Non-partisan, Intercept",
            "Shared Partisanship, Intercept",
            "Discussant Non-partisan, Approval",
            "Opposing Partisanship, Approval",
            "Respondent Non-partisan, Approval",
            "Shared Non-partisan, Approval",
            "Shared Partisanship, Approval")
      ),
    est = sprintf("%.2f", round(est, 2)),
    err = sprintf("%.2f", round(err, 2)),
    lower = sprintf("%.2f", round(lower, 2)),
    upper = sprintf("%.2f", round(upper, 2)),
  ) %>% 
  arrange(effect)  %>% 
  add_row(effect = "N, Respondents",
          est = "22,958",
          err = "56,377",
          lower = "",
          upper = "") %>% 
  add_row(effect = "N, Discussants",
          est = "22,958",
          err = "56,377",
          lower = "",
          upper = "") %>% 
  add_row(effect = "Bayesian R2",
          est = format(round(r2_2[1], 2), nsmall = 2),
          err = format(round(r2_2[2], 2), nsmall = 2),
          lower = format(round(r2_2[3], 2), nsmall = 2),
          upper = format(round(r2_2[4], 2), nsmall = 2))

# Save

write.csv(tab2, file = here("_output", "model2.csv"))


# Table 3

tab3 <- 
  posterior_samples(m3,
                    pars =
                      c("b_", "r_m")) %>% 
  mutate(draw = 1:n()) %>% 
  pivot_longer(
    cols = names(.)[1:30],
    names_to = "effect"
  ) %>% 
  group_by(effect) %>% 
  summarise(
    est = median(value),
    err = sd(value),
    lower = quantile(value, .025),
    upper = quantile(value, .975),
  ) %>% 
  filter(effect != "cor_mmprt1prt2prt3__Intercept__mmcapp1app2app3") %>% 
  mutate(
    effect =
      effect %>% 
      factor(
        levels =
          c("b_Intercept",
            "b_age",
            "b_femaleFemale",
            "b_eduBelowGCSE",
            "b_eduGCSE",
            "b_eduAMlevel",
            "b_eduUndergraduate",
            "b_eduPostgraduate",
            "b_eduOther",
            "b_eduUnknown",
            "b_cohabitCohabiting",
            "b_discuss",
            "b_att",
            "b_no_und",
            "b_pol_care",
            "b_strVerystrong",
            "b_strFairlystrong",
            "b_strNotverystrong",
            "b_app_fix",
            "r_mmprt1prt2prt3[Discussant.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Opposing.Partisanship,Intercept]",
            "r_mmprt1prt2prt3[Respondent.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Shared.Non-partisan,Intercept]",
            "r_mmprt1prt2prt3[Shared.Partisanship,Intercept]",
            "r_mmprt1prt2prt3[Discussant.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Opposing.Partisanship,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Respondent.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Shared.Non-partisan,mmcapp1app2app3]",
            "r_mmprt1prt2prt3[Shared.Partisanship,mmcapp1app2app3]"),
        labels =
          c("Intercept",
            "Age (z-score)",
            "Female",
            "Below GCSE",
            "GCSE",
            "A Level",
            "Undergraduate",
            "Postgraduate",
            "Other",
            "Unknown",
            "Cohabiting",
            "Discuss Politics",
            "Political Attention",
            "Don't Understand",
            "Don't Care",
            "Very Strong PID",
            "Fairly Strong PID",
            "Not Very Strong PID",
            "Discussant Approval",
            "Discussant Non-partisan, Intercept",
            "Opposing Partisanship, Intercept",
            "Respondent Non-partisan, Intercept",
            "Shared Non-partisan, Intercept",
            "Shared Partisanship, Intercept",
            "Discussant Non-partisan, Approval",
            "Opposing Partisanship, Approval",
            "Respondent Non-partisan, Approval",
            "Shared Non-partisan, Approval",
            "Shared Partisanship, Approval")
      ),
    est = sprintf("%.2f", round(est, 2)),
    err = sprintf("%.2f", round(err, 2)),
    lower = sprintf("%.2f", round(lower, 2)),
    upper = sprintf("%.2f", round(upper, 2)),
  ) %>% 
  arrange(effect) %>% 
  add_row(effect = "N, Respondents",
          est = "22,958",
          err = "56,377",
          lower = "",
          upper = "") %>% 
  add_row(effect = "N, Discussants",
          est = "22,958",
          err = "56,377",
          lower = "",
          upper = "") %>% 
  add_row(effect = "Bayesian R2",
          est = format(round(r2_3[1], 2), nsmall = 2),
          err = format(round(r2_3[2], 2), nsmall = 2),
          lower = format(round(r2_3[3], 2), nsmall = 2),
          upper = format(round(r2_3[4], 2), nsmall = 2))

# Save

write.csv(tab3, file = here("_output", "model3.csv"))

